clear all
set more off
cd /Users/zimaoxiao/Dropbox/CC_Yield_Predict/replication_package/  // Navigate to replication folder on your own machine

cap mkdir output

qui {		
* prepare the data for making scatter plots	
use county_fips year using data/weather/weather_processed_current, clear
merge 1:1 county_fips year using data/yield/yields_1950_2022, keep(3) nogenerate
merge m:1 county_fips using data/geo/us_county_coordinates, keep(3) keepusing(longitude state_name) nogenerate

* keep western counties and clean yield data
drop if longitude<-100 | longitude==.
gen lncornyield=log(corn_yield)
drop if lncornyield<=0

* prepare the data for regression
gen trend=year-1949 // 1950 as year 1
gen trend2=trend^2
gen StateXYear=state_fips*year

* run the regression to form yield predictions
reghdfe lncornyield state_fips#c.trend state_fips#c.trend2 [aweight=corn_areaHarv], ///
absorb(county_fips) vce(cluster county_fips StateXYear) res(r)	

* get fitted yields	
predict lncornyield_hat, xbd

* plot Figure 1 (top-10 states) 
preserve
	keep if year==2020 // rank according to the corn area in 2020
	bys state_fips: egen acres=sum(corn_areaHarv)
	bys state_fips: keep if _n==1
	gsort - acres
	gen ranking=_n
	keep state_fips state_name ranking
	tempfile acreranking
	save `acreranking', replace
restore
merge m:1 state_fips using `acreranking', keep(3) nogenerate
keep if ranking<=10 
gsort + ranking

* 1-10: Iowa, Illinois, Nebraska, Minnesota, Indiana, South Dakota, Ohio, Missouri, Kansas, Wisconsin
preserve
bys state_name: keep if _n==1
gsort + ranking
noi list state_name in 1/10
restore

twoway scatter lncornyield year, msymbol(dot) mcolor(gs14) msize(vtiny) || ///
qfit lncornyield_hat year, col(navy) ///
by(ranking) ///
ytitle("log(Corn Yields)") xtitle("Year")
noi graph export output/Figure1.jpg, as(jpg) name("Graph") quality(100) replace

}

*** EOF
